import requests
import codecs
from bs4 import BeautifulSoup

def get_wechat_article(url, filepath):
	headers = {"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36",
	           "accept": "* / *",
	           "accept-encoding": "gzip, deflate, br",
			   "accept - language": "zh - CN, zh;q = 0.9"
	}
	response = requests.get('https://mp.weixin.qq.com/s/1PU9eqAk82luiP7clut6kw', headers = headers)

	with codecs.open(filepath, 'w', 'utf-8') as f:
		f.write(response.text)
	# print response.text

def parse(filepath):
	# print html
	# f = codecs.open('data.html', 'r', encoding='utf-8')
	f = codecs.open(filepath, 'r', encoding='utf-8')
	html = f.read()
	f.close()
	bs = BeautifulSoup(html, "html.parser")
	article_title = bs.find_all(id='activity-name')[0].contents[0]
	article_date = bs.find_all(id='post-date')[0].contents[0]
	article_source = bs.find_all(id='post-user')[0].contents[0]
	article_detail = str(bs.find_all(id='js_content')[0])
	article_detail = article_detail.replace('data-src', 'src')
	# print article_title.strip()
	# print article_date.strip()
	# print article_source.strip()

	return article_title.strip(), article_date.strip(), article_source.strip(), article_detail

if __name__=='__main__':
	# get_wechat_article('')
	# print html
	parse()